# -*- coding: gbk -*-

import cgi
import wsgiref.handlers
import GetMainContentHelper
import re


from google.appengine.api import users
from google.appengine.ext import webapp
from google.appengine.api import urlfetch


class MainPage(webapp.RequestHandler):
  def get(self):
    self.response.out.write("""
      <html>
        <body>
          <center>
            <h3>Get Main Content</h3>
                  <form action="/getcontent" method="post">
                    <tr>
                      <div>
                           <td>input an url: </td>
                           <td><input type="text" name="url" size="60" /></td>
                           <td><input type="submit" value="Go!"></td>
                      </div>
                    </tr>
                  </form>
          </center>
        </body>
      </html>""")
 


class GetData(webapp.RequestHandler):
  def post(self):
      print 'Content-Type: text/html'
      print ''
      url = cgi.escape(self.request.get('url'))
      data = GetMainContentHelper.getDataFromUrl(url)
      content = GetMainContentHelper.getMainContent(data)
      title = GetMainContentHelper.getTitle()
      charset = GetMainContentHelper.getCharset()
    
      if (charset == 'utf-8'):
          print title
          print content
      #   print title.decode('utf-8').encode('gbk')
      #   print content.decode('utf-8').encode('gbk')
    
      elif (charset == 'gb2312'):
          print title.decode('gb2312','ignore').encode('utf-8','ignore')
          print content.decode('gb2312','ignore').encode('utf-8','ignore')
      #   print title.decode('gb2312').encode('gbk')
      #   print content.decode('gb2312').encode('gbk')
      else:
          print title.decode('gbk','ignore').encode('utf-8','ignore')
          print content.decode('gbk','ignore').encode('utf-8','ignore')
    
def main():
  
  application = webapp.WSGIApplication(
                                       [('/', MainPage),
                                        ('/getcontent', GetData)],
                                       debug=True)
  wsgiref.handlers.CGIHandler().run(application)
  
 # GetMainContentHelper.getMainContent(url) +
  

if __name__ == "__main__":
  main()
